# Get data from Github
import numpy as np
from math import sqrt
from sklearn.metrics import mean_squared_error
import pandas as pd
#url_1 = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Confirmed.csv'
url_1 = 'https://github.com/CSSEGISandData/COVID-19/raw/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv'
confirmed = pd.read_csv(url_1, error_bad_lines=False)
#url_2 = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Deaths.csv'
url_2 = 'https://github.com/CSSEGISandData/COVID-19/raw/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv'
death = pd.read_csv(url_2, error_bad_lines=False)
#url_3 = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Recovered.csv'
url_3 = 'https://github.com/CSSEGISandData/COVID-19/raw/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv'
recover = pd.read_csv(url_3, error_bad_lines=False)
# fix region names
confirmed['Country/Region']= confirmed['Country/Region'].str.replace("Mainland China", "China")
confirmed['Country/Region']= confirmed['Country/Region'].str.replace("US", "United States")
death['Country/Region']= death['Country/Region'].str.replace("Mainland China", "China")
death['Country/Region']= death['Country/Region'].str.replace("US", "United States")
recover['Country/Region']= recover['Country/Region'].str.replace("Mainland China", "China")
recover['Country/Region']= recover['Country/Region'].str.replace("US", "United States")
confirmed.iloc[:,:]
population=pd.read_csv('/home/notebookuser/notebooks/covid19/population.csv', sep=',', encoding='latin1')
confirmed=pd.merge(confirmed, population,how='left' ,on=['Province/State','Country/Region'])
death=pd.merge(death, population,how='left' ,on=['Province/State','Country/Region'])
recover=pd.merge(recover, population,how='left' ,on=['Province/State','Country/Region'])
# merge region confirmed + death + recover
confirmed['region']=confirmed['Country/Region'].map(str)+'_'+confirmed['Province/State'].map(str)
death['region']=death['Country/Region'].map(str)+'_'+death['Province/State'].map(str)
recover['region']=recover['Country/Region'].map(str)+'_'+recover['Province/State'].map(str)
confirmed.iloc[:,:]
# merge region death
death.iloc[175:185,:]
# merge region recover
recover.iloc[175:185,:]
confirmed.iloc[175:185,:]
def create_ts(df):
ts=df
ts=ts.drop(['Province/State', 'Country/Region','Lat', 'Long',' Population '], axis=1)
ts.set_index('region')
ts=ts.T
ts.columns=ts.loc['region']
ts=ts.drop('region')
ts=ts.fillna(0)
ts=ts.reindex(sorted(ts.columns), axis=1)
return (ts)
## JOAO - Fix - Drop Duplicates # Keep Last # Issue With Data source Change from John Hopkins institute
ts=create_ts(confirmed.drop_duplicates(subset=['region'], keep='last', inplace=False) )
ts_d=create_ts(death.drop_duplicates(subset=['region'], keep='last', inplace=False) )
ts_rec=create_ts(recover.drop_duplicates(subset=['region'], keep='last', inplace=False) )
import matplotlib.pyplot as plt
p=ts.reindex(ts.max().sort_values(ascending=False).index, axis=1)
p.iloc[:,:1].plot(marker='*',figsize=(20,8)).set_title('Daily Total Confirmed - Top World Region ',fontdict={'fontsize': 22})
p.iloc[:,2:25].plot(marker='*',figsize=(20,8)).set_title('Daily Total Confirmed - Major 2nd Areas',fontdict={'fontsize': 22})
p_d=ts_d.reindex(ts.max().sort_values(ascending=False).index, axis=1)
p_d.iloc[:,:1].plot(marker='*',figsize=(20,8)).set_title('Daily Total Death - Top World Region',fontdict={'fontsize': 22})
p_d.iloc[:,2:25].plot(marker='*',figsize=(20,8)).set_title('Daily Total Death - Major 2nd Areas',fontdict={'fontsize': 22})
p_r=ts_rec.reindex(ts.max().sort_values(ascending=False).index, axis=1)
p_r.iloc[:,:1].plot(marker='*',figsize=(20,8)).set_title('Daily Total Recoverd - Top World Region',fontdict={'fontsize': 22})
p_r.iloc[:,2:25].plot(marker='*',figsize=(20,8)).set_title('Daily Total Recoverd - Major 2nd Areas',fontdict={'fontsize': 22})
# Create data for R script
ts_r=ts.reset_index()
ts_r=ts_r.rename(columns = {'index':'date'})
ts_r['date']=pd.to_datetime(ts_r['date'] ,errors ='coerce')
ts_r.to_csv(r'/home/notebookuser/notebooks/covid19/korean/ts_r.csv')
#!pip install rpy2
import rpy2
%load_ext rpy2.ipython
%%R
#install.packages('pracma')
#install.packages('Metrics')
#install.packages('readr')
#install.packages('reshape')
%%R
Sys.setenv(TZ='GMT')
Sys.timezone()
%%R
require(pracma)
require(Metrics)
require(readr)
Sys.setenv(TZ='GMT')
all<- read_csv("/home/notebookuser/notebooks/covid19/ts_2_r.csv")
all$X1<-NULL
date<-all[,1]
date[nrow(date) + 1,1] <-all[nrow(all),1]+1
pred_all<-NULL
for (n in 2:ncol(all)-1) {
Y<-ts(data = all[n+1], start = 1, end =nrow(all)+1)
sig_w<-0.01
w<-sig_w*randn(1,100) # acceleration which denotes the fluctuation (Q/R) rnorm(100, mean = 0, sd = 1)
sig_v<-0.01
v<-sig_v*randn(1,100)
t<-0.45
phi<-matrix(c(1,0,t,1),2,2)
gama<-matrix(c(0.5*t^2,t),2,1)
H<-matrix(c(1,0),1,2)
#Kalman
x0_0<-p0_0<-matrix(c(0,0),2,1)
p0_0<-matrix(c(1,0,0,1),2,2)
Q<-0.01
R<-0.01
X<-NULL
X2<-NULL
pred<-NULL
for (i in 0:nrow(all)) {
namp <-paste("p", i+1,"_",i, sep = "")
assign(namp, phi%*%(get(paste("p", i,"_",i, sep = "")))%*%t(phi)+gama%*%Q%*%t(gama))
namk <- paste("k", i+1, sep = "")
assign(namk,get(paste("p", i+1,"_",i, sep = ""))%*%t(H)%*%(1/(H%*%get(paste("p", i+1,"_",i, sep = ""))%*%t(H)+R)))
namx <- paste("x", i+1,"_",i, sep = "")
assign(namx,phi%*%get(paste("x", i,"_",i, sep = "")))
namE <- paste("E", i+1, sep = "")
assign(namE,Y[i+1]-H%*%get(paste("x", i+1,"_",i, sep = "")))
namx2 <- paste("x", i+1,"_",i+1, sep = "")
assign(namx2,get(paste("x", i+1,"_",i, sep = ""))+get(paste("k", i+1, sep = ""))%*%get(paste("E", i+1, sep = "")))
namp2 <- paste("p", i+1,"_",i+1, sep = "")
assign(namp2,(p0_0-get(paste("k", i+1, sep = ""))%*%H)%*%get(paste("p", i+1,"_",i, sep = "")))
X<-rbind(X,get(paste("x", i+1,"_",i,sep = ""))[1])
X2<-rbind(X2,get(paste("x", i+1,"_",i,sep = ""))[2])
if(i>2){
remove(list=(paste("p", i-1,"_",i-2, sep = "")))
remove(list=(paste("k", i-1, sep = "")))
remove(list=(paste("E", i-1, sep = "")))
remove(list=(paste("p", i-2,"_",i-2, sep = "")))
remove(list=(paste("x", i-1,"_",i-2, sep = "")))
remove(list=(paste("x", i-2,"_",i-2, sep = "")))}
}
pred<-NULL
pred<-cbind(Y,X,round(X2,4))
pred<-as.data.frame(pred)
pred$region<-colnames(all[,n+1])
pred$date<-date$date
pred$actual<-rbind(0,(cbind(pred[2:nrow(pred),1])/pred[1:nrow(pred)-1,1]-1)*100)
pred$predict<-rbind(0,(cbind(pred[2:nrow(pred),2])/pred[1:nrow(pred)-1,2]-1)*100)
pred$pred_rate<-(pred$X/pred$Y-1)*100
pred$X2_change<-rbind(0,(cbind(pred[2:nrow(pred),3]-pred[1:nrow(pred)-1,3])))
pred_all<-rbind(pred_all,pred)
}
pred_all<-cbind(pred_all[,4:5],pred_all[,1:3])
names(pred_all)[5]<-"X2"
pred_all=pred_all[with( pred_all, order(region, date)), ]
pred_all<-pred_all[,3:5]
p=%R pred_all
############ Merge R output due to package problem
t=ts_d
t=t.stack().reset_index(name='confirmed')
t.columns=['date', 'region','confirmed']
t['date']=pd.to_datetime(t['date'] ,errors ='coerce')
t=t.sort_values(['region', 'date'])
temp=t.iloc[:,:3]
temp=temp.reset_index(drop=True)
for i in range(1,len(t)+1):
if(temp.iloc[i,1] is not temp.iloc[i-1,1]):
temp.loc[len(temp)+1] = [temp.iloc[i-1,0]+ pd.DateOffset(1),temp.iloc[i-1,1], 0]
temp=temp.sort_values(['region', 'date'])
temp=temp.reset_index(drop=True)
temp['Y']=p['Y']
temp['X']=p['X']
temp['X2']=p['X2']
#!pip install pyweatherbit
#from weatherbit.api import Api
#import json
#import pandas as pd
#from pandas.io.json import json_normalize
#api_key = "26141d374d8e49d0a2e1f1254428ce8e"
#api_key ="f206579c74644c4b96a2423cb56a1687"
# #api_key ="81a581ac823849f38427fb5081cb8df8"
# #api_key ="be7aaf4ff7184347aa1ebc2a2db514fe"
#api = Api(api_key)
#api.set_granularity('daily')
# # Set the granularity of the API - Options: ['daily','hourly','3hourly']
# # Will only affect forecast requests.
#api.get_forecast(lat='Lat', lon='Lon')
# ################## already done since API is limited to 500 call per day
# w=pd.DataFrame(columns=['date','region','min','max'])
# for i in range (61,len(confirmed)):
# start_date=pd.to_datetime('2020-01-22')
# for j in range (4,confirmed.shape[1]-2):
# jas=api.get_history(lat=confirmed.iloc[i,2], lon=confirmed.iloc[i,3], start_date=start_date.strftime('%Y-%m-%d'),end_date=(start_date+ pd.DateOffset(days=1)).strftime('%Y-%m-%d')).json
# # j=json_normalize(j)
# # j=j['data']
# # max_temp=json_normalize(j['data'])['max_temp'].values[0] # max
# # min_temp=json_normalize(j['data'])['min_temp'].values[0]
# try:
# w=w.append({'date':confirmed.columns[j],'region':confirmed.iloc[i,confirmed.shape[1]-1] ,'min':json_normalize(jas['data'])['min_temp'].values[0],'max':json_normalize(jas['data'])['max_temp'].values[0]}, ignore_index=True)
# except Exception:
# w=w.append({'date':confirmed.columns[j],'region':confirmed.iloc[i,confirmed.shape[1]-1] ,'min':None,'max':None}, ignore_index=True)
# start_date=start_date+ pd.DateOffset(days=1)
# ################## Update Recent Day Weather
# w_update=pd.DataFrame(columns=['date','region','min','max'])
# for i in range (28,len(confirmed)):
# start_date=pd.to_datetime('2020-02-17')
# for j in range (confirmed.shape[1]-4,confirmed.shape[1]-2):
# jas=api.get_history(lat=confirmed.iloc[i,2], lon=confirmed.iloc[i,3], start_date=start_date.strftime('%Y-%m-%d'),end_date=(start_date+ pd.DateOffset(days=1)).strftime('%Y-%m-%d')).json
# try:
# w_update=w_update.append({'date':confirmed.columns[j],'region':confirmed.iloc[i,confirmed.shape[1]-1] ,'min':json_normalize(jas['data'])['min_temp'].values[0],'max':json_normalize(jas['data'])['max_temp'].values[0]}, ignore_index=True)
# except Exception:
# w_update=w_update.append({'date':confirmed.columns[j],'region':confirmed.iloc[i,confirmed.shape[1]-1] ,'min':None,'max':None}, ignore_index=True)
# start_date=start_date+ pd.DateOffset(days=1)
# w_update.to_csv(r'w_update.csv')
# ################## Forecast Weather With API - Already Done
# #forecast = api.get_forecast(lat=lat, lon=lon)
# w_forecast=pd.DataFrame(columns=['datetime','min_temp','max_temp','region'])
# for i in range (0,len(confirmed)):
# jas=api.get_forecast(lat=confirmed.iloc[i,2], lon=confirmed.iloc[i,3]).json
# jas=json_normalize(jas['data'])[['datetime','min_temp','max_temp']]
# try:
# w_forecast_temp=jas
# w_forecast_temp['region']=confirmed.iloc[i,confirmed.shape[1]-1]
# except Exception:
# w_forecast_temp=pd.DataFrame(columns=['datetime','min_temp','max_temp','region'])
# w_forecast=w_forecast.append(w_forecast_temp)
# w_forecast=w_forecast[['datetime','region','min_temp','max_temp']]
# w_forecast.columns = ['date', 'region', 'min', 'max']
# w_forecast['date']=pd.to_datetime(w_forecast['date'],format='%Y-%m-%d')
w=pd.read_csv('/home/notebookuser/notebooks/covid19/w.csv', sep=',', encoding='latin1')
w['date']=pd.to_datetime(w['date'],format='%d/%m/%Y')
#w['date']=pd.to_datetime(w['date'],errors ='coerce')
w_forecast=pd.read_csv('/home/notebookuser/notebooks/covid19/w_forecast.csv', sep=',', encoding='latin1')
w_forecast['date']=pd.to_datetime(w_forecast['date'],format='%d/%m/%Y')
t=ts
t=t.stack().reset_index(name='confirmed')
t.columns=['date', 'region','confirmed']
t['date']=pd.to_datetime(t['date'] ,errors ='coerce')
t=t.sort_values(['region', 'date'])
# Add 1 Future day for prediction
t=t.reset_index(drop=True)
for i in range(1,len(t)+1):
if(t.iloc[i,1] is not t.iloc[i-1,1]):
t.loc[len(t)+1] = [t.iloc[i-1,0]+ pd.DateOffset(1),t.iloc[i-1,1], 0]
t=t.sort_values(['region', 'date'])
t=t.reset_index(drop=True)
t['1_day_change']=t['3_day_change']=t['7_day_change']=t['1_day_change_rate']=t['3_day_change_rate']=t['7_day_change_rate']=t['last_day']=0
#
### JOAO - Fix - ipykernel_launcher.py:5: RuntimeWarning: divide by zero encountered in double_scalars
for i in range(1,len(t)):
if(t.iloc[i,1] is t.iloc[i-2,1]):
t.iloc[i,3]=t.iloc[i-1,2]-t.iloc[i-2,2]
t.iloc[i,6]=((t.iloc[i-1,2] +1)/(t.iloc[i-2,2]-1 +1))*100
t.iloc[i,9]=t.iloc[i-1,2]
if(t.iloc[i,1] is t.iloc[i-4,1]):
t.iloc[i,4]=t.iloc[i-1,2]-t.iloc[i-4,2]
t.iloc[i,7]=((t.iloc[i-1,2] +1)/(t.iloc[i-4,2]-1 +1))*100
if(t.iloc[i,1] is t.iloc[i-8,1]):
t.iloc[i,5]=t.iloc[i-1,2]-t.iloc[i-8,2]
t.iloc[i,8]=((t.iloc[i-1,2] +1)/(t.iloc[i-8,2]-1 +1))*100
t=t.fillna(0)
t=t.merge(temp[['date','region', 'X']],how='left',on=['date','region'])
t=t.rename(columns = {'X':'kalman_prediction'})
t=t.replace([np.inf, -np.inf], 0)
### Joao - Fix NaN Kalman_Filter
#t['kalman_prediction']=round(t['kalman_prediction'])
t['kalman_prediction']=np.nan_to_num(t['kalman_prediction'])
t['kalman_prediction']=round(t['kalman_prediction'],2)
#
train=t.merge(confirmed[['region',' Population ']],how='left',on='region')
train=train.rename(columns = {' Population ':'population'})
train['population']=train['population'].str.replace(r" ", '')
train['population']=train['population'].str.replace(r",", '')
train['population']=train['population'].fillna(1)
train['population']=train['population'].astype('int32')
### JOAO - Fix - ipykernel_launcher.py:5: RuntimeWarning: divide by zero encountered in double_scalars
# train['infected_rate']=train['last_day']/train['population']*10000
train['infected_rate']=((train['last_day'] +1)/((train['population'] +1) *10000))
#
train=train.merge(w,how='left',on=['date','region'])
train=train.sort_values(['region', 'date'])
### fill missing weather
for i in range(0,len(train)):
if(np.isnan(train.iloc[i,13])):
if(train.iloc[i,1] is train.iloc[i-1,1]):
train.iloc[i,13]=train.iloc[i-1,13]
train.iloc[i,14]=train.iloc[i-1,14]
### JOAO - ERROR - ValueError: Input contains NaN, infinity or a value too large for dtype('float64').
# Select region
region='China_Hubei'
evaluation=pd.DataFrame(columns=['region','mse','rmse','mae'])
place=0
for i in range(1,len(t)):
if(t.iloc[i,1] is not t.iloc[i-1,1]):
ex=np.array(t.iloc[i-len(ts):i,10])
pred=np.array(t.iloc[i-len(ts):i,2])
evaluation=evaluation.append({'region': t.iloc[i-1,1], 'mse': np.power((ex - pred),2).mean(),'rmse':sqrt(mean_squared_error(ex,pred)),'mae': (abs(ex - pred)).mean()}, ignore_index=True)
p=t[t['region']==region][['date','region','confirmed','kalman_prediction']]
p=p.rename(columns = {'confirmed':'recoverd'})
p.iloc[len(p)-1,2]=None
p=p.set_index(['date'])
p.iloc[:,1:].plot(marker='o',figsize=(16,8)).set_title('Kalman Prediction - Select Region to Change - {}'.format(p.iloc[0,0]))
print(evaluation[evaluation['region']==p.iloc[0,0]])
### JOAO - ERROR - ValueError: Input contains NaN, infinity or a value too large for dtype('float64').
# Select region
region='China_Shanghai'
evaluation=pd.DataFrame(columns=['region','mse','rmse','mae'])
place=0
for i in range(1,len(t)):
if(t.iloc[i,1] is not t.iloc[i-1,1]):
ex=np.array(t.iloc[i-len(ts):i,10])
pred=np.array(t.iloc[i-len(ts):i,2])
evaluation=evaluation.append({'region': t.iloc[i-1,1], 'mse': np.power((ex - pred),2).mean(),'rmse':sqrt(mean_squared_error(ex,pred)),'mae': (abs(ex - pred)).mean()}, ignore_index=True)
p=t[t['region']==region][['date','region','confirmed','kalman_prediction']]
p=p.rename(columns = {'confirmed':'recoverd'})
p.iloc[len(p)-1,2]=None
p=p.set_index(['date'])
p.iloc[:,1:].plot(marker='o',figsize=(16,8)).set_title('Kalman Prediction - Select Region to Change - {}'.format(p.iloc[0,0]))
print(evaluation[evaluation['region']==p.iloc[0,0]])
region='Italy_nan'
evaluation=pd.DataFrame(columns=['region','mse','rmse','mae'])
place=0
for i in range(1,len(t)):
if(t.iloc[i,1] is not t.iloc[i-1,1]):
ex=np.array(t.iloc[i-len(ts):i,10])
pred=np.array(t.iloc[i-len(ts):i,2])
evaluation=evaluation.append({'region': t.iloc[i-1,1], 'mse': np.power((ex - pred),2).mean(),'rmse':sqrt(mean_squared_error(ex,pred)),'mae': (abs(ex - pred)).mean()}, ignore_index=True)
p=t[t['region']==region][['date','region','confirmed','kalman_prediction']]
p=p.rename(columns = {'confirmed':'recoverd'})
p.iloc[len(p)-1,2]=None
p=p.set_index(['date'])
p.iloc[:,1:].plot(marker='o',figsize=(16,8)).set_title('Kalman Prediction - Select Region to Change - {}'.format(p.iloc[0,0]))
print(evaluation[evaluation['region']==p.iloc[0,0]])
region='United States_nan'
evaluation=pd.DataFrame(columns=['region','mse','rmse','mae'])
place=0
for i in range(1,len(t)):
if(t.iloc[i,1] is not t.iloc[i-1,1]):
ex=np.array(t.iloc[i-len(ts):i,10])
pred=np.array(t.iloc[i-len(ts):i,2])
evaluation=evaluation.append({'region': t.iloc[i-1,1], 'mse': np.power((ex - pred),2).mean(),'rmse':sqrt(mean_squared_error(ex,pred)),'mae': (abs(ex - pred)).mean()}, ignore_index=True)
p=t[t['region']==region][['date','region','confirmed','kalman_prediction']]
p=p.rename(columns = {'confirmed':'recoverd'})
p.iloc[len(p)-1,2]=None
p=p.set_index(['date'])
p.iloc[:,1:].plot(marker='o',figsize=(16,8)).set_title('Kalman Prediction - Select Region to Change - {}'.format(p.iloc[0,0]))
print(evaluation[evaluation['region']==p.iloc[0,0]])
region='United Kingdom_nan'
evaluation=pd.DataFrame(columns=['region','mse','rmse','mae'])
place=0
for i in range(1,len(t)):
if(t.iloc[i,1] is not t.iloc[i-1,1]):
ex=np.array(t.iloc[i-len(ts):i,10])
pred=np.array(t.iloc[i-len(ts):i,2])
evaluation=evaluation.append({'region': t.iloc[i-1,1], 'mse': np.power((ex - pred),2).mean(),'rmse':sqrt(mean_squared_error(ex,pred)),'mae': (abs(ex - pred)).mean()}, ignore_index=True)
p=t[t['region']==region][['date','region','confirmed','kalman_prediction']]
p=p.rename(columns = {'confirmed':'recoverd'})
p.iloc[len(p)-1,2]=None
p=p.set_index(['date'])
p.iloc[:,1:].plot(marker='o',figsize=(16,8)).set_title('Kalman Prediction - Select Region to Change - {}'.format(p.iloc[0,0]))
print(evaluation[evaluation['region']==p.iloc[0,0]])
#!pip install h2o
import h2o
from h2o.estimators import H2ORandomForestEstimator
from h2o.estimators.glm import H2OGeneralizedLinearEstimator
from h2o.grid.grid_search import H2OGridSearch
h2o.init(min_mem_size='7G')
import numpy as np
from sklearn.linear_model import LinearRegression
##
#### My List of Countries and Regions to train and represent data
my_train_list=[
'Australia_New South Wales', 'Australia_Queensland',
'Australia_South Australia', 'Australia_Victoria', 'Belgium_nan',
'Cambodia_nan', 'Canada_British Columbia',
'Canada_Ontario',
'China_Anhui', 'China_Beijing',
'China_Chongqing', 'China_Fujian', 'China_Gansu',
'China_Guangdong', 'China_Guangxi', 'China_Guizhou',
'China_Hainan', 'China_Hebei', 'China_Heilongjiang', 'China_Henan',
'China_Hubei', 'China_Hunan', 'China_Inner Mongolia',
'China_Jiangsu', 'China_Jiangxi', 'China_Jilin', 'China_Liaoning',
'China_Ningxia', 'China_Qinghai', 'China_Shaanxi',
'China_Shandong', 'China_Shanghai', 'China_Shanxi',
'China_Sichuan', 'China_Tianjin', 'China_Tibet', 'China_Xinjiang',
'China_Yunnan', 'China_Zhejiang', 'Egypt_nan', 'Finland_nan',
'France_nan', 'Germany_nan',
'China_Hong Kong',
'India_nan',
'Italy_nan', 'Japan_nan',
'China_Macau',
'Malaysia_nan',
'Nepal_nan',
'Philippines_nan', 'Russia_nan', 'Singapore_nan',
'Korea, South_nan', 'Spain_nan', 'Sri Lanka_nan', 'Sweden_nan',
'Taiwan*_nan', 'Thailand_nan',
'United Arab Emirates_nan',
'Vietnam_nan',
'China_Hubei',
'China_Anhui', 'China_Beijing',
'China_Chongqing', 'China_Fujian', 'China_Gansu',
'China_Guangdong', 'China_Guangxi', 'China_Guizhou',
'China_Hainan', 'China_Hebei', 'China_Heilongjiang','China_Henan','China_Hunan',
'China_Jiangsu', 'China_Jiangxi', 'China_Jilin', 'China_Liaoning',
'China_Ningxia', 'China_Qinghai', 'China_Shaanxi',
'China_Shandong', 'China_Shanghai', 'China_Shanxi',
'China_Sichuan', 'China_Xinjiang',
'China_Yunnan', 'China_Zhejiang',
### JOAO - LIST of Countries - Start here
'Andorra_nan', 'Morocco_nan',
'Italy_nan',
'United States_nan',
'Portugal_nan',
'Spain_nan',
'Netherlands_nan',
'France_nan',
'Belgium_nan', 'Poland_nan',
'India_nan',
'United Kingdom_nan',
'Switzerland_nan',
'Germany_nan',
'Japan_nan'
### JOAO - LIST of Countries - Finish here
]
train=train.fillna(0)
###train_df=train[train['date']<'2020-02-17']
###boots=train_df[train_df['date']>='2020-02-14'] # some bootstrap to give more weight for recent days
###train_df=train_df.append([boots[boots['date']>='2020-02-14']]*1000,ignore_index=True)
###train_df_hubei=train_df[train_df['region']=='China_Hubei']
###test=train[train['date']>='2020-02-17']
###test=test[test['date']<'2020-02-19']
###
### Joao - Training progression - When growth happened 2020/03/18 to 2020/03/21
train_df=train[train['date']<'2020-03-21']
boots=train_df[train_df['date']>='2020-03-27'] # some bootstrap to give more weight for recent days
train_df=train_df.append([boots[boots['date']>='2020-03-18']]*1000,ignore_index=True)
### Train progression of the Virus ### In Country list or Spain only
region_to_train=my_train_list
train_df_v2=train_df[train_df['region'].isin(region_to_train)] # =='Spain_nan'] #
test=train[train['date']>='2020-03-21']
test=test[test['date']<'2020-03-30']
x_col=[#'region',
'1_day_change', '3_day_change','7_day_change',
'1_day_change_rate',
'3_day_change_rate',
'7_day_change_rate',
'last_day', 'kalman_prediction','infected_rate', 'min', 'max'
]
x=train_df[x_col]
y=train_df['confirmed']
reg = LinearRegression().fit(x,y)
pred2=reg.predict(test[x_col]); pred2=pd.DataFrame(pred2); pred2=round(pred2)
pred2['confirmed']=test['confirmed'].values; pred2['date']=test['date'].values; pred2['region']=test['region'].values
pred2.iloc[:55]
pred2.iloc[100:150]
train_h20 = h2o.H2OFrame(train_df)
###train_h20_hubei = h2o.H2OFrame(train_df_hubei) # different model for Hubei
#
### Joao - Italian Model
train_h20_v2 = h2o.H2OFrame(train_df_v2) # different model for V2 region ### Spain This time
training_columns = ['region','1_day_change', '3_day_change', '7_day_change', '1_day_change_rate', '3_day_change_rate',
'7_day_change_rate', 'last_day', 'kalman_prediction','infected_rate', 'min', 'max'
]
# Output parameter train against input parameters
response_column = 'confirmed'
# model = H2ORandomForestEstimator(ntrees=300, max_depth=12)
# model.train(x=training_columns, y=response_column, training_frame=train_h20)
###model_hubei = H2ORandomForestEstimator(ntrees=300, max_depth=12)
###model_hubei.train(x=training_columns, y=response_column, training_frame=train_h20_hubei)
### Joao - Italian Model
model_v2 = H2ORandomForestEstimator(ntrees=500, max_depth=17)
model_v2.train(x=training_columns, y=response_column, training_frame=train_h20_v2)
test_h20 = h2o.H2OFrame(test)
#test_h20_hubei = h2o.H2OFrame(test_hubei)
#model_hubei.varimp(True).iloc[:,:] # Feature importance for Hubei Model RF
### Joao - Model V2
model_v2.varimp(True).iloc[:,:] # Feature importance for Hubei Model RF
# # Model performance
# performance = model_hubei.model_performance(test_data=test_h20_hubei)
# # Model Create Predictions
# pred=model_hubei.predict(test_h20_hubei);pred=pred.as_data_frame(); pred=round(pred)
# #pred['daily_outcome']=test['daily_outcome'].values
# pred['confirmed']=test_hubei['confirmed'].values
# pred['date']=test_hubei['date'].values
# pred['region']=test_hubei['region'].values
# pred2=model.predict(test_h20);pred2=pred2.as_data_frame(); pred2=round(pred2)
# pred2['confirmed']=test['confirmed'].values
# pred2['date']=test['date'].values
# pred2['region']=test['region'].values
# pred=pred.append(pred2)
## Joao - Model Predictions - Country_nan _v2
performance = model_v2.model_performance(test_data=test_h20)
# # Model Create Predictions
pred=model_v2.predict(test_h20);pred=pred.as_data_frame(); pred=round(pred)
# #pred['daily_outcome']=test['daily_outcome'].values
pred['confirmed']=test['confirmed'].values
pred['date']=test['date'].values
pred['region']=test['region'].values
from string import ascii_letters
import seaborn as sns
import matplotlib.pyplot as plt
sns.set(style="white")
# Compute the correlation matrix
corr = train.iloc[:,2:].corr()
# Generate a mask for the upper triangle
mask = np.triu(np.ones_like(corr, dtype=np.bool))
# Set up the matplotlib figure
f, ax = plt.subplots(figsize=(11, 9))
# Generate a custom diverging colormap
cmap = sns.diverging_palette(220, 10, as_cmap=True)
# Draw the heatmap with the mask and correct aspect ratio
sns.heatmap(corr, mask=mask, cmap=cmap, vmax=.9, center=0,
square=True, linewidths=.5, cbar_kws={"shrink": .5})
print ('Correlation Matrix')
print('Correlation To Confirmed')
print (corr.confirmed)
import matplotlib.pyplot as plt
p=train[['date','region','min','max']].set_index('date')
p=p[p['region']=='China_Hubei']
p.iloc[:,:].plot(marker='*',figsize=(12,4),color=['#19303f','#cccc00']).set_title('Daily Min/Max Temperature - Hubei',fontdict={'fontsize': 20})
## JOAO - Temp. Teast Italy - Data Supply finishes 13/03/2020
p=train[['date','region','min','max']].set_index('date')
p=p[p['region']=='Italy_nan']
p.iloc[:,:].plot(marker='*',figsize=(12,4),color=['#19303f','#cccc00']).set_title('Daily Min/Max Temperature - Italy',fontdict={'fontsize': 20})
#
#
avg_temp=train[['region','confirmed','min','max']] # from 17-02-20 to 16-03-2020
avg_temp=avg_temp.groupby(by='region').mean()
avg_temp=avg_temp.sort_values('confirmed',ascending=False)
print( 'Most infected Areas Avg Temperature')
print(avg_temp.iloc[:100,1:])
%%R
#install.packages('reshape')
%%R
require(pracma)
require(Metrics)
require(readr)
library(reshape)
all<- read_csv("/home/notebookuser/notebooks/covid19/korean/ts_r.csv")
all$X1<-NULL
### JOAO
#### for (i in 1:30) { # Set i days prediction
#####for (i in 1:45) { # Set i days prediction
for (i in 1:75) { # Set i days prediction
if( i>1) {all<-all_new}
date<-all[,1]
date[nrow(date) + 1,1] <-all[nrow(all),1]+1
pred_all<-NULL
for (n in 2:ncol(all)-1) {
Y<-ts(data = all[n+1], start = 1, end =nrow(all)+1)
sig_w<-0.01
w<-sig_w*randn(1,100) # acceleration which denotes the fluctuation (Q/R) rnorm(100, mean = 0, sd = 1)
sig_v<-0.01
v<-sig_v*randn(1,100)
t<-0.45
phi<-matrix(c(1,0,t,1),2,2)
gama<-matrix(c(0.5*t^2,t),2,1)
H<-matrix(c(1,0),1,2)
#Kalman
x0_0<-p0_0<-matrix(c(0,0),2,1)
p0_0<-matrix(c(1,0,0,1),2,2)
Q<-0.01
R<-0.01
X<-NULL
X2<-NULL
pred<-NULL
for (i in 0:nrow(all)) {
namp <-paste("p", i+1,"_",i, sep = "")
assign(namp, phi%*%(get(paste("p", i,"_",i, sep = "")))%*%t(phi)+gama%*%Q%*%t(gama))
namk <- paste("k", i+1, sep = "")
assign(namk,get(paste("p", i+1,"_",i, sep = ""))%*%t(H)%*%(1/(H%*%get(paste("p", i+1,"_",i, sep = ""))%*%t(H)+R)))
namx <- paste("x", i+1,"_",i, sep = "")
assign(namx,phi%*%get(paste("x", i,"_",i, sep = "")))
namE <- paste("E", i+1, sep = "")
assign(namE,Y[i+1]-H%*%get(paste("x", i+1,"_",i, sep = "")))
namx2 <- paste("x", i+1,"_",i+1, sep = "")
assign(namx2,get(paste("x", i+1,"_",i, sep = ""))+get(paste("k", i+1, sep = ""))%*%get(paste("E", i+1, sep = "")))
namp2 <- paste("p", i+1,"_",i+1, sep = "")
assign(namp2,(p0_0-get(paste("k", i+1, sep = ""))%*%H)%*%get(paste("p", i+1,"_",i, sep = "")))
X<-rbind(X,get(paste("x", i+1,"_",i,sep = ""))[1])
X2<-rbind(X2,get(paste("x", i+1,"_",i,sep = ""))[2])
if(i>2){
remove(list=(paste("p", i-1,"_",i-2, sep = "")))
remove(list=(paste("k", i-1, sep = "")))
remove(list=(paste("E", i-1, sep = "")))
remove(list=(paste("p", i-2,"_",i-2, sep = "")))
remove(list=(paste("x", i-1,"_",i-2, sep = "")))
remove(list=(paste("x", i-2,"_",i-2, sep = "")))}
}
pred<-NULL
pred<-cbind(Y,X,round(X2,4))
pred<-as.data.frame(pred)
pred$region<-colnames(all[,n+1])
pred$date<-date$date
pred$actual<-rbind(0,(cbind(pred[2:nrow(pred),1])/pred[1:nrow(pred)-1,1]-1)*100)
pred$predict<-rbind(0,(cbind(pred[2:nrow(pred),2])/pred[1:nrow(pred)-1,2]-1)*100)
pred$pred_rate<-(pred$X/pred$Y-1)*100
pred$X2_change<-rbind(0,(cbind(pred[2:nrow(pred),3]-pred[1:nrow(pred)-1,3])))
pred_all<-rbind(pred_all,pred)
}
pred_all<-cbind(pred_all[,4:5],pred_all[,1:3])
names(pred_all)[5]<-"X2"
pred_all<-pred_all[,1:5]
pred_all_today=pred_all[with( pred_all, order(region, date)), ]
all_new=all
#all_new[nrow(all_new),1]<-all_new[nrow(all),1]+1
temp<-with(pred_all_today, pred_all_today[date == all[nrow(all),1]+1, ])
temp<-cbind(temp[,1:2],temp[,4])
temp2<-reshape(temp, direction = "wide", idvar = "date", timevar = "region")
rand_num<-runif(ncol(temp2)-1, 0.9, 1.05)
temp2[,2:ncol(temp2)]<-temp2[,2:ncol(temp2)]*rand_num
colnames(temp2)=colnames(all_new)
all_new<-rbind(all_new,temp2)
all_new[,2:ncol(all_new)]<-round(all_new[,2:ncol(all_new)])
for (i in 2:ncol(all_new)) {
all_new[nrow(all_new),i]=max(all_new[nrow(all_new)-1,i],all_new[nrow(all_new),i])}
}
all_new=%R all_new
all_new['date']=pd.to_datetime(all_new['date'],unit='d')
# Select regions
region=['date',
'Australia_New South Wales', 'Australia_Queensland',
'Australia_South Australia', 'Australia_Victoria', 'Belgium_nan',
'Cambodia_nan', 'Canada_British Columbia',
'Canada_Ontario',
'China_Anhui', 'China_Beijing',
'China_Chongqing', 'China_Fujian', 'China_Gansu',
'China_Guangdong', 'China_Guangxi', 'China_Guizhou',
'China_Hainan', 'China_Hebei', 'China_Heilongjiang', 'China_Henan',
'China_Hubei', 'China_Hunan', 'China_Inner Mongolia',
'China_Jiangsu', 'China_Jiangxi', 'China_Jilin', 'China_Liaoning',
'China_Ningxia', 'China_Qinghai', 'China_Shaanxi',
'China_Shandong', 'China_Shanghai', 'China_Shanxi',
'China_Sichuan', 'China_Tianjin', 'China_Tibet', 'China_Xinjiang',
'China_Yunnan', 'China_Zhejiang', 'Egypt_nan', 'Finland_nan',
'France_nan', 'Germany_nan',
'China_Hong Kong',
'India_nan',
'Italy_nan', 'Japan_nan',
'China_Macau',
'Malaysia_nan',
'Nepal_nan',
'Philippines_nan', 'Russia_nan', 'Singapore_nan',
'Korea, South_nan', 'Spain_nan', 'Sri Lanka_nan', 'Sweden_nan',
'Taiwan*_nan', 'Thailand_nan',
'United Arab Emirates_nan',
# 'Unites States_Boston, MA',
# 'Unites States_Chicago, IL', 'Unites States_Los Angeles, CA',
# 'Unites States_Madison, WI', 'Unites States_Orange, CA',
# 'Unites States_San Antonio, TX', 'Unites States_San Benito, CA',
# 'Unites States_San Diego County, CA',
# 'Unites States_Santa Clara, CA', 'Unites States_Seattle, WA',
# 'Unites States_Tempe, AZ',
'Vietnam_nan',
# ]
'China_Hubei',
'China_Anhui', 'China_Beijing',
'China_Chongqing', 'China_Fujian', 'China_Gansu',
'China_Guangdong', 'China_Guangxi', 'China_Guizhou',
'China_Hainan', 'China_Hebei', 'China_Heilongjiang','China_Henan','China_Hunan',
'China_Jiangsu', 'China_Jiangxi', 'China_Jilin', 'China_Liaoning',
'China_Ningxia', 'China_Qinghai', 'China_Shaanxi',
'China_Shandong', 'China_Shanghai', 'China_Shanxi',
'China_Sichuan', 'China_Xinjiang',
'China_Yunnan', 'China_Zhejiang',
# 'Hong Kong_Hong Kong','Others_Diamond Princess cruise ship'
### JOAO - LIST of Countries - Start here
'Andorra_nan', 'Morocco_nan',
# 'Italy_nan',
'United States_nan',
'Portugal_nan',
# 'Spain_nan',
'Netherlands_nan',
# 'France_nan',
'Belgium_nan', 'Poland_nan',
# 'India_nan',
'United Kingdom_nan',
'Switzerland_nan',
# 'Germany_nan',
### JOAO - LIST of Countries - Finish here
# 'Japan_nan'
]
p_kalman=all_new[region]
#p=all_new
#p.iloc[len(p)-1,2]=None
p_kalman=p_kalman.set_index(['date'])
p_kalman.iloc[:,:].plot(marker='o',figsize=(24,14)).set_title('Kalman Prediction')
#p_kalman2=all_new[['date','China_Hubei']]
p_kalman2=all_new[['date','Spain_nan']] ## Joao
p_kalman2=p_kalman2.set_index(['date'])
p_kalman2.iloc[:,:].plot(marker='o',figsize=(24,14)).set_title('Kalman Prediction - Select Country/Region to Change - {}'.format(p_kalman2.columns[0]))
### Joao - Dynamic print
#print(region[:])
for i in range(1,len(region)):
country_print=region[i]
#print("here:"+country_print)
p_kalman_rg=all_new[['date',country_print]]
p_kalman_rg=p_kalman_rg.set_index(['date'])
p_kalman_rg.iloc[:,:].plot(marker='o',figsize=(16,8)).set_title('Kalman Prediction - Select Country/Region to Change - {}'.format(p_kalman_rg.columns[0]))
### JOAO - Old code
p_kalman3=all_new[['date','Italy_nan']]
p_kalman3=p_kalman3.set_index(['date'])
p_kalman3.iloc[:,:].plot(marker='o',figsize=(16,8)).set_title('Kalman Prediction - Select Country/Region to Change - {}'.format(p_kalman3.columns[0]))
p_kalman4=all_new[['date','United States_nan']]
p_kalman4=p_kalman4.set_index(['date'])
p_kalman4.iloc[:,:].plot(marker='o',figsize=(16,8)).set_title('Kalman Prediction - Select Country/Region to Change - {}'.format(p_kalman4.columns[0]))
p_kalman5=all_new[['date','Germany_nan']]
p_kalman5=p_kalman5.set_index(['date'])
p_kalman5.iloc[:,:].plot(marker='o',figsize=(16,8)).set_title('Kalman Prediction - Select Country/Region to Change - {}'.format(p_kalman5.columns[0]))
p_kalman6=all_new[['date','France_nan']]
p_kalman6=p_kalman6.set_index(['date'])
p_kalman6.iloc[:,:].plot(marker='o',figsize=(16,8)).set_title('Kalman Prediction - Select Country/Region to Change - {}'.format(p_kalman6.columns[0]))
p_kalman7=all_new[['date','Netherlands_nan']]
p_kalman7=p_kalman7.set_index(['date'])
p_kalman7.iloc[:,:].plot(marker='o',figsize=(16,8)).set_title('Kalman Prediction - Select Country/Region to Change - {}'.format(p_kalman7.columns[0]))
p_kalman8=all_new[['date','Portugal_nan']]
p_kalman8=p_kalman8.set_index(['date'])
p_kalman8.iloc[:,:].plot(marker='o',figsize=(16,8)).set_title('Kalman Prediction - Select Country/Region to Change - {}'.format(p_kalman8.columns[0]))
p_kalman9=all_new[['date','United Kingdom_nan']]
p_kalman9=p_kalman9.set_index(['date'])
p_kalman9.iloc[:,:].plot(marker='o',figsize=(16,8)).set_title('Kalman Prediction - Select Country/Region to Change - {}'.format(p_kalman9.columns[0]))
p_kalman10=all_new[['date','Poland_nan']]
p_kalman10=p_kalman10.set_index(['date'])
p_kalman10.iloc[:,:].plot(marker='o',figsize=(16,8)).set_title('Kalman Prediction - Select Country/Region to Change - {}'.format(p_kalman10.columns[0]))
t.to_csv(r't_confirmed_global.csv')
all_new.to_csv(r'prediction_kalman_filter_global.csv')
t_iter=all_new.set_index(['date'])
t_iter=t_iter.stack().reset_index(name='confirmed')
t_iter.columns=['date', 'region','confirmed']
t_iter['date']=pd.to_datetime(t_iter['date'] ,errors ='coerce')
t_iter=t_iter.sort_values(['region', 'date'])
t_iter=t_iter.reset_index(drop=True)
for i in range(1,len(t_iter)+1):
if(t_iter.iloc[i,1] is not t_iter.iloc[i-1,1]):
t_iter.loc[len(t_iter)+1] = [t_iter.iloc[i-1,0]+ pd.DateOffset(1),t_iter.iloc[i-1,1], 0]
t_iter=t_iter.sort_values(['region', 'date'])
t_iter=t_iter.reset_index(drop=True)
### Joao - Fix - RuntimeWarning: divide by zero encountered in double_scalars
#
t_iter['1_day_change']=t_iter['3_day_change']=t_iter['7_day_change']=t_iter['1_day_change_rate']=t_iter['3_day_change_rate']=t_iter['7_day_change_rate']=t_iter['last_day']=0
for i in range(1,len(t_iter)):
if(t_iter.iloc[i,1] is t_iter.iloc[i-2,1]):
t_iter.iloc[i,3]=t_iter.iloc[i-1,2]-t_iter.iloc[i-2,2]
t_iter.iloc[i,6]=((t_iter.iloc[i-1,2] +1)/(t_iter.iloc[i-2,2]-1 +1))*100
t_iter.iloc[i,9]=t_iter.iloc[i-1,2]
if(t_iter.iloc[i,1] is t_iter.iloc[i-4,1]):
t_iter.iloc[i,4]=t_iter.iloc[i-1,2]-t_iter.iloc[i-4,2]
t_iter.iloc[i,7]=((t_iter.iloc[i-1,2] +1)/(t_iter.iloc[i-4,2]-1 +1))*100
if(t_iter.iloc[i,1] is t_iter.iloc[i-8,1]):
t_iter.iloc[i,5]=t_iter.iloc[i-1,2]-t_iter.iloc[i-8,2]
t_iter.iloc[i,8]=((t_iter.iloc[i-1,2] +1)/(t_iter.iloc[i-8,2]-1 +1))*100
t_iter=t_iter.fillna(0)
# t_iter=t_iter.merge(temp[['date','region', 'X']],how='left',on=['date','region'])
# t_iter=t_iter.rename(columns = {'X':'kalman_prediction'})
t_iter=t_iter.replace([np.inf, -np.inf], 0)
t_iter['kalman_prediction']=round(t_iter['confirmed'])
test_iter=t_iter.merge(confirmed[['region',' Population ']],how='left',on='region')
test_iter=test_iter.rename(columns = {' Population ':'population'})
test_iter['population']=test_iter['population'].str.replace(r" ", '')
test_iter['population']=test_iter['population'].str.replace(r",", '')
test_iter['population']=test_iter['population'].fillna(1)
test_iter['population']=test_iter['population'].astype('int32')
## Joao - Fix Divid By Zero
#test_iter['infected_rate'] =test_iter['last_day']/test_iter['population']*10000
test_iter['infected_rate'] =(test_iter['last_day']+1)/(test_iter['population']+1)*10000
#
test_iter=test_iter.merge(w,how='left',on=['date','region'])
#test_iter=test_iter.sort_values(['region', 'date'])
test_iter_temp=test_iter[np.isnan(test_iter['min'])]
test_iter_temp=test_iter_temp.drop(columns=['min', 'max'])
test_iter_temp=test_iter_temp.merge(w_forecast,how='left',on=['date','region'])
test_iter=test_iter.dropna()
test_iter=test_iter.append(test_iter_temp)
test_iter=test_iter.sort_values(['region', 'date'])
### fill missing weather
for i in range(0,len(test_iter)):
if(np.isnan(test_iter.iloc[i,13])):
if(test_iter.iloc[i,1] is test_iter.iloc[i-1,1]):
test_iter.iloc[i,13]=test_iter.iloc[i-1,13]+abs(test_iter.iloc[i-1,13]*.01)
test_iter.iloc[i,14]=test_iter.iloc[i-1,14]+abs(test_iter.iloc[i-1,14]*.01)
test_iter=test_iter.fillna(0)
test_iter[test_iter.isnull().any(axis=1)]
test_iter[35620:35640]
### JOAO - ERROR - ValueError: Index contains duplicate entries, cannot reshape
pred=reg.predict(test_iter[x_col]); pred=pd.DataFrame(pred); pred.columns = ['prediction'];pred=round(pred)
pred['confirmed']=test_iter['confirmed'].values; pred['date']=test_iter['date'].values; pred['region']=test_iter['region'].values
for i in range(1,len(pred)):
if(pred.iloc[i,3] is pred.iloc[i-1,3]):
if(pred.iloc[i,0]<pred.iloc[i-1,1]):
pred.iloc[i,0]=pred.iloc[i-1,1]
### JOAO - Drop Duplicates
pred=pred.drop_duplicates(subset=['date','region'], keep='last', inplace=False)
pred=pred.pivot(index='date',columns='region',values='prediction') # pivot pred df
region=[
# 'China_Anhui', 'China_Beijing',
# 'China_Chongqing', 'China_Fujian', 'China_Gansu',
# 'China_Guangdong', 'China_Guangxi', 'China_Guizhou',
# 'China_Hainan', 'China_Hebei', 'China_Heilongjiang', 'China_Henan','China_Hunan',
# 'China_Jiangsu', 'China_Jiangxi', 'China_Jilin', 'China_Liaoning',
# 'China_Ningxia', 'China_Qinghai', 'China_Shaanxi',
# 'China_Shandong', 'China_Shanghai', 'China_Shanxi',
# 'China_Sichuan', 'China_Xinjiang',
# 'China_Yunnan', 'China_Zhejiang',
# 'Hong Kong_Hong Kong','Japan_nan','Others_Diamond Princess cruise ship'
#
### JOAO - LIST of Countries - Start here
### 'Andorra_nan', 'Morocco_nan',
'China_Hubei',
'Italy_nan',
# 'United States_nan',
'Portugal_nan', 'Spain_nan',
'Germany_nan', 'Switzerland_nan',
# 'Netherlands_nan',
'France_nan',
'Iran_nan',
'Belgium_nan', 'Poland_nan',
'United Kingdom_nan',
'Russia_nan', 'India_nan',
'Australia_New South Wales',
'Sweden_nan',
# 'Singapore_nan','China_Hong Kong',
# 'Taiwan*_nan',
#'Korea, South_nan',
# 'China_Macau',
# 'India_nan',
### JOAO - LIST of Countries - Finish here
]
# region=['China_Anhui', 'China_Beijing',
# 'China_Chongqing', 'China_Fujian', 'China_Gansu',
# 'China_Guangdong', 'China_Guangxi', 'China_Guizhou',
# 'China_Hainan', 'China_Hebei', 'China_Heilongjiang', 'China_Henan',
# 'China_Jiangsu', 'China_Jiangxi', 'China_Liaoning',
# 'China_Ningxia', 'China_Qinghai', 'China_Shaanxi',
# 'China_Shandong', 'China_Shanghai', 'China_Shanxi',
# 'China_Yunnan', 'China_Zhejiang'
# ]
### Joao - ERROR - KeyError: "None of [Index(['Portugal_nan', 'Andorra_nan', 'Spain_nan', 'Belgium_nan',\n 'Morocco_nan', 'Netherlands_Netherlands', 'France_France', 'Poland_nan',\n 'United Kingdom_United Kingdom', 'Switzerland_nan', 'Germany_nan',\n 'Italy_nan'],\n dtype='object')] are in the [columns]"
p=pred[region]
p.plot(marker='*',figsize=(24,14),title ='Major Areas Prediction')
### Joao - Error - KeyError: 'Italy_nan'
p2=pred['Italy_nan']
p2.plot(marker='o',figsize=(16,8),title ='Italy Prediction - Confirmed Cases Covid-19')
p3=pred['Spain_nan']
p3.plot(marker='o',figsize=(16,8),title ='Spain Prediction - Confirmed Cases Covid-19')
p4=pred['Switzerland_nan']
p4.plot(marker='o',figsize=(16,8),title ='Switzerland Prediction - Confirmed Cases Covid-19')
p5=pred['Germany_nan']
p5.plot(marker='o',figsize=(16,8),title ='Germany Prediction - Confirmed Cases Covid-19')
p6=pred['Portugal_nan']
p6.plot(marker='o',figsize=(16,8),title ='Portugal Prediction - Confirmed Cases Covid-19')
p8=pred['United Kingdom_nan']
p8.plot(marker='o',figsize=(16,8),title ='United Kingdom Prediction - Confirmed Cases Covid-19')
p9=pred['France_nan']
p9.plot(marker='o',figsize=(16,8),title ='France Prediction - Confirmed Cases Covid-19')
p10=pred['United States_nan']
p10.plot(marker='o',figsize=(25,12),title ='United States_nan Prediction - Confirmed Cases Covid-19')
p11=pred['China_Hubei']
p11.plot(marker='o',figsize=(16,8),title ='China - Hubei Prediction - Confirmed Cases Covid-19')
p12=pred['Iran_nan']
p12.plot(marker='o',figsize=(16,8),title ='China - Hubei Prediction - Confirmed Cases Covid-19')
p13=pred['Sweden_nan']
p13.plot(marker='o',figsize=(16,8),title ='China - Hubei Prediction - Confirmed Cases Covid-19')
p13=pred['Russia_nan']
p13.plot(marker='o',figsize=(16,8),title ='China - Hubei Prediction - Confirmed Cases Covid-19')
pv1=p #p2.append(p3).append(p4).append(p5).append(p6).append(p8).append(p9).append(p11)
p=pd.DataFrame(pv1)
p[:]
pip install gmplot
p.to_csv('/home/notebookuser/notebooks/covid19/p_confirmed_daily.csv', index = True)
# Import the necessary libraries
import pandas as pd
import gmplot
# For improved table display in the notebook
#from IPython.display import display
import random
heatmap=confirmed[['region','Lat','Long']]
p_m=p.T # pred.T #
heatmap=heatmap[heatmap['region'].isin(region)]
p_m=p_m.reset_index()
heatmap_m=heatmap.merge(p_m,how='left',on='region')
heatmap=pd.DataFrame()
vmaxni=len(heatmap_m)
#vmaxnii=(i+1)
for i in range(0,len(heatmap)):
if heatmap_m.iloc[i,vmaxni].astype(int)==0: #### heatmap_m.iloc[i,61] # heatmap_m.iloc[i,9] columns is the date we want to check
continue
heatmap=heatmap.append(pd.concat([heatmap_m.iloc[i:(i+1),1:3]]*abs(heatmap_m.iloc[i,(i+1)].astype(int)), ignore_index=True,sort=False))
### Joao - Print in datetime
from datetime import datetime
#
latitudes = heatmap_m['Lat']
longitudes = heatmap_m['Long']
# Creating the location we would like to initialize the focus on.
# Parameters: Lattitude, Longitude, Zoom
heatmap=heatmap_m
gmap = gmplot.GoogleMapPlotter(46.99474,6.87237, 4)
gmap.heatmap(latitudes, longitudes)
datemap=datetime.today().strftime('%Y-%m-%d')
# Generate the heatmap into an HTML file
gmap.draw("Heatmap-"+datemap+".html")
exit()